# install.packages(renv)
# renv::init()
# renv::install("reticulate")
# renv::use_python()
#
# py_pkgs <- c(
# "scanpy",
# "anndata",
# "mofapy2"
# )
#
# reticulate::py_install(py_pkgs)
#
# BiocManager::install(c("SingleCellExperiment", "scran", "batchelor", "scater"))
# install.packages(c("patchwork"))
renv::activate()
* Project '/nfs/team205/ed6/bin/Pan_fetal_immune/src/5_organ_signatures/MOFA_factor_analysis' loaded. [renv 0.12.5]
suppressPackageStartupMessages({
library(tidyverse)
library(MOFA2)
library(Matrix)
library(SingleCellExperiment)
library(scran)
library(glue)
library(scater)
library(patchwork)
library(batchelor)
library(rhdf5)
# library(ggraph)
}
)
[0;1;31mSystem has not been booted with systemd as init system (PID 1). Can't operate.[0m
[0;1;31mFailed to create bus connection: Host is down[0m
running command 'timedatectl' had status 1package ‘ggplot2’ was built under R version 4.0.5package ‘tibble’ was built under R version 4.0.5package ‘tidyr’ was built under R version 4.0.5package ‘readr’ was built under R version 4.0.5package ‘dplyr’ was built under R version 4.0.5package ‘matrixStats’ was built under R version 4.0.5
Define plotting utils
remove_x_axis <- function(){
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.title.x = element_blank())
}
remove_y_axis <- function(){
theme(axis.text.y = element_blank(), axis.ticks.y = element_blank(), axis.title.y = element_blank())
}
org_colors <- read_csv("~/Pan_fetal_immune/metadata/organ_colors.csv")
New names:
* `` -> ...1
[1m[1mRows: [1m[22m[34m[34m9[34m[39m [1m[1mColumns: [1m[22m[34m[34m3[34m[39m
[36m──[39m [1m[1mColumn specification[1m[22m [36m───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m (2): organ, color
[32mdbl[39m (1): ...1
[36mℹ[39m Use [38;5;235m[48;5;253m[38;5;235m[48;5;253m`spec()`[48;5;253m[38;5;235m[49m[39m to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set [38;5;235m[48;5;253m[38;5;235m[48;5;253m`show_col_types = FALSE`[48;5;253m[38;5;235m[49m[39m to quiet this message.
org_colors <- setNames(org_colors$color, org_colors$organ)
figdir <- "~/mount/gdrive/Pan_fetal/Updates_and_presentations/figures/MOFA_analysis_MYELOID/"
if (!dir.exists(figdir)){ dir.create(figdir) }
split = "MYELOID"
indir <- glue("/nfs/team205/ed6/data/Fetal_immune/LMM_data/LMM_input_{split}_PBULK/")
matrix <- readMM(file = paste0(indir, "matrix.mtx.gz"))
coldata <- read.csv(file = paste0(indir, "metadata.csv.gz")) %>%
column_to_rownames("X")
rowdata <- read.csv(file = paste0(indir, "gene.csv.gz"))
## Make SingleCellExperiment obj
sce <- SingleCellExperiment(list(logcounts = t(matrix)), colData = coldata)
rownames(sce) <- make.unique(rowdata$GeneName)
## Plot number of cells per organ/celltype pair
n_cells_heatmap <- data.frame(colData(sce)) %>%
group_by(anno_lvl_2_final_clean, organ) %>%
summarise(n_cells=sum(n_cells)) %>%
ggplot(aes(anno_lvl_2_final_clean, organ)) +
geom_tile(aes(fill=log10(n_cells))) +
geom_text(aes(label=n_cells), color="white") +
scale_fill_viridis_c() +
theme_classic(base_size = 16) +
xlab("celltype") +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.title.x = element_blank())
`summarise()` has grouped output by 'anno_lvl_2_final_clean'. You can override using the `.groups` argument.
n_samples_heatmap <- data.frame(colData(sce)) %>%
group_by(anno_lvl_2_final_clean, organ) %>%
summarise(n_samples=n()) %>%
ggplot(aes(anno_lvl_2_final_clean, organ)) +
geom_tile(aes(fill=n_samples)) +
geom_text(aes(label=n_samples), color="white") +
scale_fill_viridis_c(option="cividis") +
theme_classic(base_size = 16) +
xlab("celltype") +
theme(axis.text.x = element_text(angle=90, hjust=1, vjust=0.5))
`summarise()` has grouped output by 'anno_lvl_2_final_clean'. You can override using the `.groups` argument.
n_cells_heatmap / n_samples_heatmap
## Filter out samples with less than 20 cells
sce <- sce[,sce$n_cells > 20]
# Exclude celltypes present in just one organ
keep_ct <- data.frame(colData(sce)) %>%
dplyr::select(organ, anno_lvl_2_final_clean) %>%
distinct() %>%
group_by(anno_lvl_2_final_clean) %>%
summarise(n=n()) %>%
ungroup() %>%
filter(n > 1) %>%
pull(anno_lvl_2_final_clean)
sce <- sce[,sce$anno_lvl_2_final_clean %in% keep_ct]
# Filter out celltypes with less than 10 samples
keep_ct <- data.frame(colData(sce)) %>%
group_by(anno_lvl_2_final_clean) %>%
summarise(n_samples=n()) %>%
filter(n_samples >= 10) %>%
pull(anno_lvl_2_final_clean)
sce <- sce[,sce$anno_lvl_2_final_clean %in% keep_ct]
## Exclude low quality clusters
anno_groups <- jsonlite::fromJSON(txt = "~/Pan_fetal_immune/metadata/anno_groups.json")
sce <- sce[,!sce$anno_lvl_2_final_clean %in% anno_groups$OTHER]
## Exclude donor F19 (low Q)
sce <- sce[,!sce$donor %in% c('F19')]
## Plot number of cells per organ/celltype pair
n_cells_heatmap <- data.frame(colData(sce)) %>%
group_by(anno_lvl_2_final_clean, organ) %>%
summarise(n_cells=sum(n_cells)) %>%
ggplot(aes(anno_lvl_2_final_clean, organ)) +
geom_tile(aes(fill=log10(n_cells))) +
geom_text(aes(label=n_cells), color="white") +
scale_fill_viridis_c() +
theme_classic(base_size = 16) +
xlab("celltype") +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.title.x = element_blank())
`summarise()` has grouped output by 'anno_lvl_2_final_clean'. You can override using the `.groups` argument.
n_samples_heatmap <- data.frame(colData(sce)) %>%
group_by(anno_lvl_2_final_clean, organ) %>%
summarise(n_samples=n()) %>%
ggplot(aes(anno_lvl_2_final_clean, organ)) +
geom_tile(aes(fill=n_samples)) +
geom_text(aes(label=n_samples), color="white") +
scale_fill_viridis_c(option="cividis") +
theme_classic(base_size = 16) +
xlab("celltype") +
theme(axis.text.x = element_text(angle=90, hjust=1, vjust=0.5))
`summarise()` has grouped output by 'anno_lvl_2_final_clean'. You can override using the `.groups` argument.
n_cells_heatmap / n_samples_heatmap
## Save order of CTs from widespread to restricted
levels(pl$data$anno_lvl_2_final_clean)
[1] "DC2" "CD16+_MACROPHAGE"
[3] "CD14_MONO" "PROLIFERATING_MACROPHAGE"
[5] "CD14+_MACROPHAGE" "DC3"
[7] "DC1" "EO_BASO_MAST"
[9] "LMPP_ELP" "OLFML3+_MICROGLIA"
[11] "YS_MACROPHAGE" "KUPFFER_RP_MACROPHAGE"
[13] "PROMONOCYTE_(PROLIFERATING)" "BM_CD14_MONO"
[15] "HSC_MPP" "PRE_PRO_B"
[17] "MEMP" "CMP"
[19] "PROLIFERATING_KUPFFER_RP_MACROPHAGE" "CYCLING_MPP"
[21] "GMP" "NEUTROPHIL"
[23] "SPLENIC_MACROPHAGE" "PROMONOCYTE"
## Feature selection w scran WITHIN CELLTYPE
anno_groups <- split(colnames(sce), sce$anno_lvl_2_final_clean)
all_hvgs <- c()
for (i in anno_groups){
dec <- modelGeneVar(sce[,i])
hvgs <- getTopHVGs(dec, n = 1000)
all_hvgs <- union(all_hvgs, hvgs)
}
sce <- sce[which(rowSums(logcounts(sce)) > 0),]
sce
class: SingleCellExperiment
dim: 29568 1030
metadata(0):
assays(1): logcounts
rownames(29568): TSPAN6 TNMD ... AP000646.1 AP006216.3
rowData names(0):
colnames(1030): F45_SK_CD45P_FCAImmP7579224-F45-SK-DC3-12-5GEX F45_SK_CD45P_FCAImmP7579224-F45-SK-DC2-12-5GEX ...
F50_SP_CD45P_FCAImmP7803020-F50-SP-CD14_MONO-15-5GEX F30_TH_CD45N_FCAImmP7277565-F30-TH-DC1-14-3GEX
colData names(7): Sample donor ... method n_cells
reducedDimNames(0):
altExpNames(0):
EDA with PCA
sce <- runPCA(sce, scale=TRUE, ncomponents=30,
exprs_values = "logcounts", subset_row=all_hvgs)
plotPCA(sce, colour_by="donor", ncomponents=6)
plotPCA(sce, colour_by="method", ncomponents=6)
plotPCA(sce, colour_by="organ", ncomponents=10)
Minimize obvious technical effects (3GEX/5GEX, donor) using linear regression (following procedure from OSCA)
## Regress technical effects
design <- model.matrix(~donor+method,data=colData(sce))
residuals <- regressBatches(sce, assay.type = "logcounts", design = design)
assay(sce, "corrected_logcounts") <- as.matrix(assay(residuals[,colnames(sce)], "corrected"))
## Regress organ (soup effect)
design <- model.matrix(~organ,data=colData(sce)) ## Include organ term to capture soup
residuals <- regressBatches(sce, assay.type = "corrected_logcounts", design = design)
assay(sce, "corrected_logcounts") <- as.matrix(assay(residuals[,colnames(sce)], "corrected"))
Check regression has an effect repeating PCA
sce <- runPCA(sce, scale=TRUE, ncomponents=30, exprs_values = "corrected_logcounts")
plotPCA(sce, colour_by="method", ncomponents=6)
plotPCA(sce, colour_by="donor", ncomponents=6)
plotPCA(sce, colour_by="organ", ncomponents=8)
## Feature selection w scran WITHIN CELLTYPE
anno_groups <- split(colnames(sce), sce$anno_lvl_2_final_clean)
all_hvgs <- c()
for (i in anno_groups){
dec <- modelGeneVar(sce[,i], assay.type = "corrected_logcounts")
hvgs <- getTopHVGs(dec, n = 1000)
all_hvgs <- union(all_hvgs, hvgs)
}
Make MOFA object (Use celltypes as grouping covariate)
str_replace(sce$anno_lvl_2_final_clean, "/","_")
[1] "DC3" "DC2" "PROLIFERATING_MACROPHAGE"
[4] "EO_BASO/MAST" "CD14_MONO" "CD14+_MACROPHAGE"
[7] "DC3" "PROLIFERATING_MACROPHAGE" "CYCLING_MPP"
[10] "PROLIFERATING_KUPFFER_RP_MACROPHAGE" "CD16+_MACROPHAGE" "KUPFFER_RP_MACROPHAGE"
[13] "CD14+_MACROPHAGE" "KUPFFER_RP_MACROPHAGE" "DC3"
[16] "PROLIFERATING_KUPFFER_RP_MACROPHAGE" "DC2" "CD16+_MACROPHAGE"
[19] "EO_BASO/MAST" "CD14+_MACROPHAGE" "CD14_MONO"
[22] "DC1" "SPLENIC_MACROPHAGE" "DC2"
[25] "DC1" "CD14_MONO" "CD14+_MACROPHAGE"
[28] "EO_BASO/MAST" "EO_BASO/MAST" "CYCLING_MPP"
[31] "CMP" "MEMP" "PRE_PRO_B"
[34] "KUPFFER_RP_MACROPHAGE" "HSC_MPP" "OLFML3+_MICROGLIA"
[37] "CD16+_MACROPHAGE" "CD14+_MACROPHAGE" "CD14+_MACROPHAGE"
[40] "DC1" "DC3" "DC2"
[43] "EO_BASO/MAST" "CD14_MONO" "CD14+_MACROPHAGE"
[46] "CD16+_MACROPHAGE" "CD14+_MACROPHAGE" "DC3"
[49] "CMP" "EO_BASO/MAST" "CYCLING_MPP"
[52] "GMP" "PRE_PRO_B" "PROMONOCYTE_(PROLIFERATING)"
[55] "HSC_MPP" "PROMONOCYTE" "BM_CD14_MONO"
[58] "KUPFFER_RP_MACROPHAGE" "EO_BASO/MAST" "GMP"
[61] "CYCLING_MPP" "MEMP" "DC3"
[64] "PROLIFERATING_KUPFFER_RP_MACROPHAGE" "CMP" "HSC_MPP"
[67] "DC1" "CD16+_MACROPHAGE" "PRE_PRO_B"
[70] "PROMONOCYTE_(PROLIFERATING)" "DC2" "CD14_MONO"
[73] "PROLIFERATING_MACROPHAGE" "LMPP_ELP" "CD14+_MACROPHAGE"
[76] "BM_CD14_MONO" "DC3" "PROLIFERATING_MACROPHAGE"
[79] "PROLIFERATING_KUPFFER_RP_MACROPHAGE" "CYCLING_MPP" "CD16+_MACROPHAGE"
[82] "KUPFFER_RP_MACROPHAGE" "CD14+_MACROPHAGE" "CD14_MONO"
[85] "OLFML3+_MICROGLIA" "HSC_MPP" "DC2"
[88] "EO_BASO/MAST" "LMPP_ELP" "KUPFFER_RP_MACROPHAGE"
[91] "PROLIFERATING_KUPFFER_RP_MACROPHAGE" "DC2" "DC3"
[94] "CD16+_MACROPHAGE" "CD14_MONO" "CD14_MONO"
[97] "DC3" "PROMONOCYTE_(PROLIFERATING)" "DC1"
[100] "PROLIFERATING_MACROPHAGE" "CD14+_MACROPHAGE" "DC2"
[103] "CD16+_MACROPHAGE" "BM_CD14_MONO" "CMP"
[106] "EO_BASO/MAST" "HSC_MPP" "BM_CD14_MONO"
[109] "OLFML3+_MICROGLIA" "DC3" "DC1"
[112] "PROLIFERATING_MACROPHAGE" "DC2" "CD14+_MACROPHAGE"
[115] "CD14_MONO" "EO_BASO/MAST" "CD16+_MACROPHAGE"
[118] "DC3" "DC1" "DC2"
[121] "CD16+_MACROPHAGE" "PROLIFERATING_MACROPHAGE" "EO_BASO/MAST"
[124] "CD14+_MACROPHAGE" "CD14_MONO" "BM_CD14_MONO"
[127] "CMP" "DC3" "GMP"
[130] "PROMONOCYTE_(PROLIFERATING)" "PROMONOCYTE" "PRE_PRO_B"
[133] "BM_CD14_MONO" "EO_BASO/MAST" "NEUTROPHIL"
[136] "SPLENIC_MACROPHAGE" "DC1" "CD14_MONO"
[139] "DC2" "CD14+_MACROPHAGE" "CD16+_MACROPHAGE"
[142] "CMP" "CYCLING_MPP" "GMP"
[145] "EO_BASO/MAST" "HSC_MPP" "MEMP"
[148] "PRE_PRO_B" "OLFML3+_MICROGLIA" "DC3"
[151] "PROLIFERATING_MACROPHAGE" "EO_BASO/MAST" "PROMONOCYTE_(PROLIFERATING)"
[154] "CD14+_MACROPHAGE" "BM_CD14_MONO" "CD14_MONO"
[157] "DC2" "CD16+_MACROPHAGE" "CD16+_MACROPHAGE"
[160] "PROLIFERATING_MACROPHAGE" "CD14+_MACROPHAGE" "CYCLING_MPP"
[163] "CMP" "EO_BASO/MAST" "HSC_MPP"
[166] "MEMP" "CD14+_MACROPHAGE" "KUPFFER_RP_MACROPHAGE"
[169] "DC3" "PROLIFERATING_KUPFFER_RP_MACROPHAGE" "DC2"
[172] "CD16+_MACROPHAGE" "CD14_MONO" "PROLIFERATING_KUPFFER_RP_MACROPHAGE"
[175] "KUPFFER_RP_MACROPHAGE" "DC3" "DC1"
[178] "CD16+_MACROPHAGE" "EO_BASO/MAST" "DC2"
[181] "CD14_MONO" "CD14+_MACROPHAGE" "YS_MACROPHAGE"
[184] "BM_CD14_MONO" "PROLIFERATING_MACROPHAGE" "OLFML3+_MICROGLIA"
[187] "DC3" "CD14_MONO" "DC2"
[190] "CD14+_MACROPHAGE" "PROLIFERATING_KUPFFER_RP_MACROPHAGE" "DC3"
[193] "KUPFFER_RP_MACROPHAGE" "CYCLING_MPP" "GMP"
[196] "HSC_MPP" "PROMONOCYTE_(PROLIFERATING)" "EO_BASO/MAST"
[199] "CD16+_MACROPHAGE" "DC1" "PRE_PRO_B"
[202] "CMP" "CD14_MONO" "MEMP"
[205] "DC2" "PROLIFERATING_MACROPHAGE" "LMPP_ELP"
[208] "CD14+_MACROPHAGE" "PROLIFERATING_KUPFFER_RP_MACROPHAGE" "DC3"
[211] "KUPFFER_RP_MACROPHAGE" "CD16+_MACROPHAGE" "DC2"
[214] "CD14_MONO" "DC2" "CD16+_MACROPHAGE"
[217] "DC3" "DC1" "PROMONOCYTE_(PROLIFERATING)"
[220] "CD14+_MACROPHAGE" "OLFML3+_MICROGLIA" "PROLIFERATING_MACROPHAGE"
[223] "EO_BASO/MAST" "DC2" "CD14_MONO"
[226] "DC3" "PROLIFERATING_MACROPHAGE" "CD14+_MACROPHAGE"
[229] "CD16+_MACROPHAGE" "EO_BASO/MAST" "KUPFFER_RP_MACROPHAGE"
[232] "CD14_MONO" "OLFML3+_MICROGLIA" "DC2"
[235] "LMPP_ELP" "PROLIFERATING_MACROPHAGE" "CD16+_MACROPHAGE"
[238] "CD14+_MACROPHAGE" "DC2" "DC1"
[241] "DC3" "CD14+_MACROPHAGE" "DC2"
[244] "CD14_MONO" "PROLIFERATING_MACROPHAGE" "MEMP"
[247] "NEUTROPHIL" "DC3" "DC1"
[250] "PROLIFERATING_MACROPHAGE" "PROLIFERATING_KUPFFER_RP_MACROPHAGE" "KUPFFER_RP_MACROPHAGE"
[253] "CD16+_MACROPHAGE" "CD14+_MACROPHAGE" "SPLENIC_MACROPHAGE"
[256] "EO_BASO/MAST" "DC2" "YS_MACROPHAGE"
[259] "CD14_MONO" "PROMONOCYTE_(PROLIFERATING)" "DC3"
[262] "CMP" "PRE_PRO_B" "GMP"
[265] "CD16+_MACROPHAGE" "EO_BASO/MAST" "KUPFFER_RP_MACROPHAGE"
[268] "BM_CD14_MONO" "PROMONOCYTE" "CD14_MONO"
[271] "NEUTROPHIL" "DC1" "CYCLING_MPP"
[274] "CMP" "HSC_MPP" "EO_BASO/MAST"
[277] "CD14+_MACROPHAGE" "OLFML3+_MICROGLIA" "DC3"
[280] "CD14+_MACROPHAGE" "CD14_MONO" "PROLIFERATING_MACROPHAGE"
[283] "LMPP_ELP" "DC2" "MEMP"
[286] "DC3" "DC1" "CD14_MONO"
[289] "DC2" "PROLIFERATING_MACROPHAGE" "CD14+_MACROPHAGE"
[292] "EO_BASO/MAST" "CD16+_MACROPHAGE" "DC1"
[295] "EO_BASO/MAST" "CD14_MONO" "DC2"
[298] "PROLIFERATING_MACROPHAGE" "CD14+_MACROPHAGE" "CD16+_MACROPHAGE"
[301] "KUPFFER_RP_MACROPHAGE" "PROLIFERATING_KUPFFER_RP_MACROPHAGE" "CD16+_MACROPHAGE"
[304] "DC2" "CD14+_MACROPHAGE" "CD14_MONO"
[307] "PROLIFERATING_KUPFFER_RP_MACROPHAGE" "DC3" "KUPFFER_RP_MACROPHAGE"
[310] "YS_MACROPHAGE" "PROLIFERATING_MACROPHAGE" "DC2"
[313] "CD16+_MACROPHAGE" "CD14+_MACROPHAGE" "DC3"
[316] "PROLIFERATING_MACROPHAGE" "YS_MACROPHAGE" "KUPFFER_RP_MACROPHAGE"
[319] "CD14+_MACROPHAGE" "CD16+_MACROPHAGE" "CD14+_MACROPHAGE"
[322] "DC1" "DC1" "DC3"
[325] "KUPFFER_RP_MACROPHAGE" "DC1" "PROLIFERATING_KUPFFER_RP_MACROPHAGE"
[328] "CD16+_MACROPHAGE" "EO_BASO/MAST" "DC2"
[331] "SPLENIC_MACROPHAGE" "CD14_MONO" "PROLIFERATING_MACROPHAGE"
[334] "BM_CD14_MONO" "CD14+_MACROPHAGE" "CMP"
[337] "EO_BASO/MAST" "GMP" "HSC_MPP"
[340] "BM_CD14_MONO" "DC3" "DC1"
[343] "CD14_MONO" "DC2" "CYCLING_MPP"
[346] "EO_BASO/MAST" "HSC_MPP" "CMP"
[349] "MEMP" "CD14_MONO" "CYCLING_MPP"
[352] "EO_BASO/MAST" "CMP" "PRE_PRO_B"
[355] "HSC_MPP" "MEMP" "DC3"
[358] "CMP" "GMP" "PROMONOCYTE_(PROLIFERATING)"
[361] "EO_BASO/MAST" "CD16+_MACROPHAGE" "PROMONOCYTE"
[364] "PRE_PRO_B" "BM_CD14_MONO" "CD14_MONO"
[367] "OLFML3+_MICROGLIA" "PROLIFERATING_MACROPHAGE" "CD14+_MACROPHAGE"
[370] "DC1" "DC2" "CD14_MONO"
[373] "LMPP_ELP" "LMPP_ELP" "EO_BASO/MAST"
[376] "KUPFFER_RP_MACROPHAGE" "DC3" "PROLIFERATING_KUPFFER_RP_MACROPHAGE"
[379] "DC1" "HSC_MPP" "CD14+_MACROPHAGE"
[382] "PRE_PRO_B" "CD14_MONO" "CD16+_MACROPHAGE"
[385] "MEMP" "DC2" "DC3"
[388] "KUPFFER_RP_MACROPHAGE" "PROLIFERATING_KUPFFER_RP_MACROPHAGE" "CD14+_MACROPHAGE"
[391] "YS_MACROPHAGE" "PROLIFERATING_MACROPHAGE" "CD16+_MACROPHAGE"
[394] "DC2" "CD14_MONO" "SPLENIC_MACROPHAGE"
[397] "DC2" "CD14_MONO" "PROMONOCYTE_(PROLIFERATING)"
[400] "CD16+_MACROPHAGE" "PROLIFERATING_MACROPHAGE" "DC3"
[403] "EO_BASO/MAST" "DC2" "CD14_MONO"
[406] "CD14+_MACROPHAGE" "DC3" "OLFML3+_MICROGLIA"
[409] "EO_BASO/MAST" "DC1" "CD14+_MACROPHAGE"
[412] "YS_MACROPHAGE" "DC2" "PROLIFERATING_MACROPHAGE"
[415] "CD14_MONO" "CD16+_MACROPHAGE" "CYCLING_MPP"
[418] "HSC_MPP" "EO_BASO/MAST" "CYCLING_MPP"
[421] "CMP" "EO_BASO/MAST" "HSC_MPP"
[424] "MEMP" "KUPFFER_RP_MACROPHAGE" "SPLENIC_MACROPHAGE"
[427] "DC1" "DC2" "CD14_MONO"
[430] "DC2" "CD14+_MACROPHAGE" "CD16+_MACROPHAGE"
[433] "DC3" "CMP" "EO_BASO/MAST"
[436] "GMP" "PRE_PRO_B" "PROMONOCYTE_(PROLIFERATING)"
[439] "CD16+_MACROPHAGE" "BM_CD14_MONO" "PROMONOCYTE"
[442] "NEUTROPHIL" "KUPFFER_RP_MACROPHAGE" "DC3"
[445] "PROLIFERATING_KUPFFER_RP_MACROPHAGE" "CD16+_MACROPHAGE" "EO_BASO/MAST"
[448] "HSC_MPP" "MEMP" "DC2"
[451] "CD14_MONO" "CYCLING_MPP" "CMP"
[454] "CD16+_MACROPHAGE" "OLFML3+_MICROGLIA" "CD14+_MACROPHAGE"
[457] "MEMP" "CD16+_MACROPHAGE" "DC1"
[460] "DC2" "CYCLING_MPP" "DC3"
[463] "CMP" "EO_BASO/MAST" "HSC_MPP"
[466] "PRE_PRO_B" "MEMP" "CD14_MONO"
[469] "KUPFFER_RP_MACROPHAGE" "PROLIFERATING_KUPFFER_RP_MACROPHAGE" "DC3"
[472] "CD16+_MACROPHAGE" "DC2" "CD14_MONO"
[475] "CMP" "PROMONOCYTE_(PROLIFERATING)" "DC3"
[478] "GMP" "PRE_PRO_B" "EO_BASO/MAST"
[481] "CD16+_MACROPHAGE" "PROMONOCYTE" "BM_CD14_MONO"
[484] "CD14_MONO" "NEUTROPHIL" "DC3"
[487] "PROMONOCYTE_(PROLIFERATING)" "PROLIFERATING_MACROPHAGE" "DC2"
[490] "DC1" "CD14+_MACROPHAGE" "CD14_MONO"
[493] "CD16+_MACROPHAGE" "BM_CD14_MONO" "DC1"
[496] "CD16+_MACROPHAGE" "DC2" "LMPP_ELP"
[499] "EO_BASO/MAST" "DC3" "CMP"
[502] "PROMONOCYTE_(PROLIFERATING)" "GMP" "PRE_PRO_B"
[505] "BM_CD14_MONO" "PROMONOCYTE" "CD14_MONO"
[508] "DC1" "CD16+_MACROPHAGE" "KUPFFER_RP_MACROPHAGE"
[511] "DC3" "CD16+_MACROPHAGE" "PROLIFERATING_KUPFFER_RP_MACROPHAGE"
[514] "DC1" "EO_BASO/MAST" "CD14+_MACROPHAGE"
[517] "DC2" "CD14_MONO" "BM_CD14_MONO"
[520] "MEMP" "EO_BASO/MAST" "PRE_PRO_B"
[523] "EO_BASO/MAST" "MEMP" "KUPFFER_RP_MACROPHAGE"
[526] "PROLIFERATING_KUPFFER_RP_MACROPHAGE" "CD14_MONO" "CD16+_MACROPHAGE"
[529] "DC2" "DC1" "CD16+_MACROPHAGE"
[532] "KUPFFER_RP_MACROPHAGE" "PROLIFERATING_KUPFFER_RP_MACROPHAGE" "CD16+_MACROPHAGE"
[535] "DC2" "CD14_MONO" "KUPFFER_RP_MACROPHAGE"
[538] "DC3" "CD16+_MACROPHAGE" "PROLIFERATING_KUPFFER_RP_MACROPHAGE"
[541] "CD14_MONO" "DC2" "CD14+_MACROPHAGE"
[544] "EO_BASO/MAST" "CMP" "PRE_PRO_B"
[547] "MEMP" "DC1" "CD16+_MACROPHAGE"
[550] "DC1" "SPLENIC_MACROPHAGE" "KUPFFER_RP_MACROPHAGE"
[553] "CD16+_MACROPHAGE" "DC2" "CD14_MONO"
[556] "DC3" "PROMONOCYTE_(PROLIFERATING)" "EO_BASO/MAST"
[559] "DC1" "OLFML3+_MICROGLIA" "CD14+_MACROPHAGE"
[562] "DC2" "PROLIFERATING_MACROPHAGE" "CD14_MONO"
[565] "DC1" "DC2" "DC1"
[568] "SPLENIC_MACROPHAGE" "DC3" "CD14+_MACROPHAGE"
[571] "PROLIFERATING_MACROPHAGE" "KUPFFER_RP_MACROPHAGE" "PROLIFERATING_KUPFFER_RP_MACROPHAGE"
[574] "YS_MACROPHAGE" "CD16+_MACROPHAGE" "DC2"
[577] "EO_BASO/MAST" "CD14_MONO" "DC3"
[580] "PRE_PRO_B" "CMP" "GMP"
[583] "PROMONOCYTE_(PROLIFERATING)" "EO_BASO/MAST" "HSC_MPP"
[586] "CD16+_MACROPHAGE" "PROMONOCYTE" "BM_CD14_MONO"
[589] "CD14_MONO" "NEUTROPHIL" "CD14+_MACROPHAGE"
[592] "CD14_MONO" "CD16+_MACROPHAGE" "DC3"
[595] "EO_BASO/MAST" "PROMONOCYTE_(PROLIFERATING)" "PRE_PRO_B"
[598] "CD16+_MACROPHAGE" "BM_CD14_MONO" "PROMONOCYTE"
[601] "CD14_MONO" "CD16+_MACROPHAGE" "CD14+_MACROPHAGE"
[604] "DC2" "CD14+_MACROPHAGE" "PROLIFERATING_MACROPHAGE"
[607] "OLFML3+_MICROGLIA" "CD16+_MACROPHAGE" "DC3"
[610] "PROLIFERATING_MACROPHAGE" "SPLENIC_MACROPHAGE" "CD16+_MACROPHAGE"
[613] "PROMONOCYTE_(PROLIFERATING)" "CMP" "PRE_PRO_B"
[616] "DC1" "DC2" "GMP"
[619] "CD14+_MACROPHAGE" "CD14_MONO" "EO_BASO/MAST"
[622] "PROLIFERATING_KUPFFER_RP_MACROPHAGE" "BM_CD14_MONO" "KUPFFER_RP_MACROPHAGE"
[625] "DC3" "PROLIFERATING_KUPFFER_RP_MACROPHAGE" "DC1"
[628] "PROLIFERATING_MACROPHAGE" "CD16+_MACROPHAGE" "DC2"
[631] "CD14_MONO" "SPLENIC_MACROPHAGE" "BM_CD14_MONO"
[634] "EO_BASO/MAST" "CD14+_MACROPHAGE" "PROMONOCYTE_(PROLIFERATING)"
[637] "EO_BASO/MAST" "CYCLING_MPP" "CMP"
[640] "HSC_MPP" "CYCLING_MPP" "EO_BASO/MAST"
[643] "HSC_MPP" "MEMP" "DC1"
[646] "DC3" "CYCLING_MPP" "CMP"
[649] "EO_BASO/MAST" "HSC_MPP" "PRE_PRO_B"
[652] "PROMONOCYTE_(PROLIFERATING)" "BM_CD14_MONO" "DC1"
[655] "CMP" "DC3" "CD16+_MACROPHAGE"
[658] "PRE_PRO_B" "EO_BASO/MAST" "CMP"
[661] "CD16+_MACROPHAGE" "MEMP" "EO_BASO/MAST"
[664] "NEUTROPHIL" "CMP" "DC3"
[667] "CYCLING_MPP" "PROMONOCYTE_(PROLIFERATING)" "GMP"
[670] "PRE_PRO_B" "CD16+_MACROPHAGE" "EO_BASO/MAST"
[673] "PROMONOCYTE" "BM_CD14_MONO" "NEUTROPHIL"
[676] "CD14_MONO" "CMP" "MEMP"
[679] "HSC_MPP" "EO_BASO/MAST" "KUPFFER_RP_MACROPHAGE"
[682] "DC3" "DC2" "DC1"
[685] "PROLIFERATING_KUPFFER_RP_MACROPHAGE" "CD16+_MACROPHAGE" "CD14_MONO"
[688] "DC3" "PROLIFERATING_MACROPHAGE" "CD14+_MACROPHAGE"
[691] "CD14_MONO" "DC2" "CD16+_MACROPHAGE"
[694] "CD16+_MACROPHAGE" "EO_BASO/MAST" "CD14+_MACROPHAGE"
[697] "DC3" "PROLIFERATING_MACROPHAGE" "DC1"
[700] "PRE_PRO_B" "SPLENIC_MACROPHAGE" "EO_BASO/MAST"
[703] "DC2" "CD16+_MACROPHAGE" "CD14_MONO"
[706] "KUPFFER_RP_MACROPHAGE" "CYCLING_MPP" "DC3"
[709] "PROLIFERATING_MACROPHAGE" "PROLIFERATING_KUPFFER_RP_MACROPHAGE" "KUPFFER_RP_MACROPHAGE"
[712] "CD16+_MACROPHAGE" "CD14+_MACROPHAGE" "CMP"
[715] "CYCLING_MPP" "EO_BASO/MAST" "GMP"
[718] "HSC_MPP" "MEMP" "CD14_MONO"
[721] "KUPFFER_RP_MACROPHAGE" "CYCLING_MPP" "CMP"
[724] "PROLIFERATING_KUPFFER_RP_MACROPHAGE" "PRE_PRO_B" "HSC_MPP"
[727] "DC3" "EO_BASO/MAST" "CD16+_MACROPHAGE"
[730] "MEMP" "DC3" "OLFML3+_MICROGLIA"
[733] "PROLIFERATING_MACROPHAGE" "DC1" "YS_MACROPHAGE"
[736] "CD16+_MACROPHAGE" "EO_BASO/MAST" "CD14+_MACROPHAGE"
[739] "DC2" "LMPP_ELP" "CD14_MONO"
[742] "DC3" "PROMONOCYTE_(PROLIFERATING)" "EO_BASO/MAST"
[745] "BM_CD14_MONO" "PROMONOCYTE" "SPLENIC_MACROPHAGE"
[748] "DC1" "KUPFFER_RP_MACROPHAGE" "DC3"
[751] "DC2" "CD14_MONO" "CMP"
[754] "EO_BASO/MAST" "GMP" "PROMONOCYTE_(PROLIFERATING)"
[757] "PROMONOCYTE" "PROMONOCYTE" "DC3"
[760] "CMP" "GMP" "PRE_PRO_B"
[763] "CD16+_MACROPHAGE" "PROMONOCYTE_(PROLIFERATING)" "EO_BASO/MAST"
[766] "BM_CD14_MONO" "NEUTROPHIL" "CD14_MONO"
[769] "DC1" "DC3" "KUPFFER_RP_MACROPHAGE"
[772] "PROMONOCYTE_(PROLIFERATING)" "PRE_PRO_B" "CD16+_MACROPHAGE"
[775] "EO_BASO/MAST" "BM_CD14_MONO" "CD14+_MACROPHAGE"
[778] "CD14_MONO" "PROMONOCYTE" "GMP"
[781] "KUPFFER_RP_MACROPHAGE" "CMP" "DC3"
[784] "EO_BASO/MAST" "PROLIFERATING_KUPFFER_RP_MACROPHAGE" "DC2"
[787] "PROMONOCYTE_(PROLIFERATING)" "CD14_MONO" "CD16+_MACROPHAGE"
[790] "BM_CD14_MONO" "GMP" "PRE_PRO_B"
[793] "PROMONOCYTE_(PROLIFERATING)" "EO_BASO/MAST" "CMP"
[796] "PROMONOCYTE" "BM_CD14_MONO" "CD14_MONO"
[799] "DC3" "DC1" "CD14+_MACROPHAGE"
[802] "PROLIFERATING_MACROPHAGE" "DC2" "CD16+_MACROPHAGE"
[805] "EO_BASO/MAST" "CD14_MONO" "BM_CD14_MONO"
[808] "DC1" "DC3" "OLFML3+_MICROGLIA"
[811] "DC2" "CD14+_MACROPHAGE" "YS_MACROPHAGE"
[814] "PROLIFERATING_MACROPHAGE" "CD14_MONO" "CD16+_MACROPHAGE"
[817] "EO_BASO/MAST" "BM_CD14_MONO" "DC1"
[820] "OLFML3+_MICROGLIA" "DC3" "DC1"
[823] "PROLIFERATING_MACROPHAGE" "CD14_MONO" "DC2"
[826] "CD14+_MACROPHAGE" "EO_BASO/MAST" "CD14+_MACROPHAGE"
[829] "DC3" "PROMONOCYTE_(PROLIFERATING)" "PRE_PRO_B"
[832] "PROLIFERATING_MACROPHAGE" "CD16+_MACROPHAGE" "EO_BASO/MAST"
[835] "BM_CD14_MONO" "PROMONOCYTE" "GMP"
[838] "CD14+_MACROPHAGE" "CD14_MONO" "EO_BASO/MAST"
[841] "CYCLING_MPP" "CMP" "HSC_MPP"
[844] "GMP" "PRE_PRO_B" "PROLIFERATING_MACROPHAGE"
[847] "CD14+_MACROPHAGE" "YS_MACROPHAGE" "DC2"
[850] "OLFML3+_MICROGLIA" "CD16+_MACROPHAGE" "CD14_MONO"
[853] "KUPFFER_RP_MACROPHAGE" "DC3" "PROLIFERATING_KUPFFER_RP_MACROPHAGE"
[856] "DC1" "DC2" "CD16+_MACROPHAGE"
[859] "CD14+_MACROPHAGE" "PROLIFERATING_MACROPHAGE" "SPLENIC_MACROPHAGE"
[862] "CD14_MONO" "EO_BASO/MAST" "CYCLING_MPP"
[865] "CMP" "HSC_MPP" "CMP"
[868] "DC3" "DC2" "KUPFFER_RP_MACROPHAGE"
[871] "PROLIFERATING_KUPFFER_RP_MACROPHAGE" "CD16+_MACROPHAGE" "DC1"
[874] "CD14_MONO" "CD14+_MACROPHAGE" "DC3"
[877] "KUPFFER_RP_MACROPHAGE" "PROLIFERATING_KUPFFER_RP_MACROPHAGE" "EO_BASO/MAST"
[880] "PRE_PRO_B" "DC1" "CD16+_MACROPHAGE"
[883] "HSC_MPP" "SPLENIC_MACROPHAGE" "MEMP"
[886] "CD14_MONO" "DC2" "CD14+_MACROPHAGE"
[889] "PROMONOCYTE_(PROLIFERATING)" "EO_BASO/MAST" "CYCLING_MPP"
[892] "DC3" "CMP" "HSC_MPP"
[895] "DC3" "CYCLING_MPP" "CMP"
[898] "PROMONOCYTE_(PROLIFERATING)" "EO_BASO/MAST" "GMP"
[901] "PROMONOCYTE" "PRE_PRO_B" "HSC_MPP"
[904] "BM_CD14_MONO" "CD14_MONO" "DC1"
[907] "CYCLING_MPP" "CMP" "HSC_MPP"
[910] "EO_BASO/MAST" "MEMP" "PRE_PRO_B"
[913] "MEMP" "EO_BASO/MAST" "CD14+_MACROPHAGE"
[916] "DC1" "KUPFFER_RP_MACROPHAGE" "DC2"
[919] "PROLIFERATING_KUPFFER_RP_MACROPHAGE" "YS_MACROPHAGE" "PROLIFERATING_MACROPHAGE"
[922] "SPLENIC_MACROPHAGE" "CD16+_MACROPHAGE" "CD14_MONO"
[925] "EO_BASO/MAST" "DC3" "KUPFFER_RP_MACROPHAGE"
[928] "SPLENIC_MACROPHAGE" "DC1" "PROLIFERATING_MACROPHAGE"
[931] "CD14_MONO" "PRE_PRO_B" "DC2"
[934] "CD16+_MACROPHAGE" "EO_BASO/MAST" "CD14+_MACROPHAGE"
[937] "DC1" "CD16+_MACROPHAGE" "DC1"
[940] "DC3" "PROLIFERATING_MACROPHAGE" "PROLIFERATING_KUPFFER_RP_MACROPHAGE"
[943] "YS_MACROPHAGE" "CD16+_MACROPHAGE" "DC2"
[946] "KUPFFER_RP_MACROPHAGE" "CD14+_MACROPHAGE" "DC3"
[949] "CMP" "EO_BASO/MAST" "PROMONOCYTE_(PROLIFERATING)"
[952] "GMP" "CD16+_MACROPHAGE" "PRE_PRO_B"
[955] "PROMONOCYTE" "CD14_MONO" "BM_CD14_MONO"
[958] "PROLIFERATING_MACROPHAGE" "DC3" "CD14+_MACROPHAGE"
[961] "OLFML3+_MICROGLIA" "KUPFFER_RP_MACROPHAGE" "CD16+_MACROPHAGE"
[964] "CD14_MONO" "OLFML3+_MICROGLIA" "DC3"
[967] "DC1" "DC2" "PROLIFERATING_MACROPHAGE"
[970] "CD14+_MACROPHAGE" "CD14_MONO" "CD16+_MACROPHAGE"
[973] "KUPFFER_RP_MACROPHAGE" "DC3" "PROLIFERATING_KUPFFER_RP_MACROPHAGE"
[976] "CD14_MONO" "CD16+_MACROPHAGE" "DC2"
[979] "EO_BASO/MAST" "CMP" "HSC_MPP"
[982] "CYCLING_MPP" "DC3" "PRE_PRO_B"
[985] "GMP" "PROMONOCYTE" "DC2"
[988] "KUPFFER_RP_MACROPHAGE" "CD14_MONO" "PROMONOCYTE_(PROLIFERATING)"
[991] "CD16+_MACROPHAGE" "BM_CD14_MONO" "CMP"
[994] "PRE_PRO_B" "PROMONOCYTE_(PROLIFERATING)" "GMP"
[997] "PROMONOCYTE" "EO_BASO/MAST" "BM_CD14_MONO"
[1000] "NEUTROPHIL"
[ reached getOption("max.print") -- omitted 30 entries ]
object <- mofa_obj
Prepare 4 training
data_opts <- get_default_data_options(object)
data_opts$use_float32 <- TRUE
data_opts$center_groups <- FALSE
object@data_options <- data_opts
model_opts <- get_default_model_options(object)
model_opts$num_factors <- 30
# model_opts$ard_factors <- FALSE
train_opts <- get_default_training_options(object)
train_opts$seed <- 2020
train_opts$convergence_mode <- "medium" # use "fast" for faster training
train_opts$stochastic <- FALSE
# mefisto_opts <- get_default_mefisto_options(object)
# mefisto_opts$warping <- FALSE
# mefisto_opts$sparseGP <- TRUE
object <- prepare_mofa(
object = object,
data_options = data_opts,
model_options = model_opts,
training_options = train_opts
)
# Multi-group mode requested.
This is an advanced option, if this is the first time that you are running MOFA, we suggest that you try do some exploration first without specifying groups. Two important remarks:
- The aim of the multi-group framework is to identify the sources of variability *within* the groups. If your aim is to find a factor that 'separates' the groups, you DO NOT want to use the multi-group framework. Please see the FAQ on the MOFA2 webpage.
- It is important to account for the group effect before selecting highly variable features (HVFs). We suggest that either you calculate HVFs per group and then take the union, or regress out the group effect before HVF selection
Checking data options...
Due to string size limitations in the HDF5 format, sample names will be trimmed to less than 50 charactersChecking training options...
Checking model options...
object
Untrained MOFA model with the following characteristics:
Number of views: 1
Views names: corrected_logcounts
Number of features (per view): 6726
Number of groups: 24
Groups names: BM_CD14_MONO CD14_MONO CD14+_MACROPHAGE CD16+_MACROPHAGE CMP CYCLING_MPP DC1 DC2 DC3 EO_BASO_MAST GMP HSC_MPP KUPFFER_RP_MACROPHAGE LMPP_ELP MEMP NEUTROPHIL OLFML3+_MICROGLIA PRE_PRO_B PROLIFERATING_KUPFFER_RP_MACROPHAGE PROLIFERATING_MACROPHAGE PROMONOCYTE PROMONOCYTE_(PROLIFERATING) SPLENIC_MACROPHAGE YS_MACROPHAGE
Number of samples (per group): 35 86 71 89 44 29 60 76 80 86 25 32 47 10 26 10 20 37 36 48 20 33 18 12
Wrapped in run_mofa.R
mofa_trained
Error: object 'mofa_trained' not found
### Tweaking the MOFA2 loading function because the quality control complains
load_model <- function(file, sort_factors = TRUE, on_disk = FALSE, load_data = TRUE,
remove_outliers = FALSE, remove_inactive_factors = TRUE, verbose = FALSE,
load_interpol_Z = FALSE) {
# Create new MOFAodel object
object <- new("MOFA")
object@status <- "trained"
# Set on_disk option
if (on_disk) {
object@on_disk <- TRUE
} else {
object@on_disk <- FALSE
}
# Get groups and data set names from the hdf5 file object
h5ls.out <- h5ls(file, datasetinfo = FALSE)
########################
## Load training data ##
########################
# Load names
if ("views" %in% h5ls.out$name) {
view_names <- as.character( h5read(file, "views")[[1]] )
group_names <- as.character( h5read(file, "groups")[[1]] )
feature_names <- h5read(file, "features")[view_names]
sample_names <- h5read(file, "samples")[group_names]
} else { # for old models
feature_names <- h5read(file, "features")
sample_names <- h5read(file, "samples")
view_names <- names(feature_names)
group_names <- names(sample_names)
h5ls.out <- h5ls.out[grep("variance_explained", h5ls.out$name, invert = TRUE),]
}
if("covariates" %in% h5ls.out$name){
covariate_names <- as.character( h5read(file, "covariates")[[1]])
} else {
covariate_names <- NULL
}
# Load training data (as nested list of matrices)
data <- list(); intercepts <- list()
if (load_data && "data"%in%h5ls.out$name) {
object@data_options[["loaded"]] <- TRUE
if (verbose) message("Loading data...")
for (m in view_names) {
data[[m]] <- list()
intercepts[[m]] <- list()
for (g in group_names) {
if (on_disk) {
# as DelayedArrays
data[[m]][[g]] <- DelayedArray::DelayedArray( HDF5ArraySeed(file, name = sprintf("data/%s/%s", m, g) ) )
} else {
# as matrices
data[[m]][[g]] <- h5read(file, sprintf("data/%s/%s", m, g) )
tryCatch(intercepts[[m]][[g]] <- as.numeric( h5read(file, sprintf("intercepts/%s/%s", m, g) ) ), error = function(e) { NULL })
}
# Replace NaN by NA
data[[m]][[g]][is.nan(data[[m]][[g]])] <- NA # this realised into memory, TO FIX
}
}
# Create empty training data (as nested list of empty matrices, with the correct dimensions)
} else {
object@data_options[["loaded"]] <- FALSE
for (m in view_names) {
data[[m]] <- list()
for (g in group_names) {
data[[m]][[g]] <- .create_matrix_placeholder(rownames = feature_names[[m]], colnames = sample_names[[g]])
}
}
}
object@data <- data
object@intercepts <- intercepts
# Load metadata if any
if ("samples_metadata" %in% h5ls.out$name) {
object@samples_metadata <- bind_rows(lapply(group_names, function(g) as.data.frame(h5read(file, sprintf("samples_metadata/%s", g)))))
}
if ("features_metadata" %in% h5ls.out$name) {
object@features_metadata <- bind_rows(lapply(view_names, function(m) as.data.frame(h5read(file, sprintf("features_metadata/%s", m)))))
}
# ############################
# ## Load sample covariates ##
# ############################
#
# if (any(grepl("cov_samples", h5ls.out$group))){
# covariates <- list()
# for (g in group_names) {
# if (on_disk) {
# # as DelayedArrays
# covariates[[g]] <- DelayedArray::DelayedArray( HDF5ArraySeed(file, name = sprintf("cov_samples/%s", g) ) )
# } else {
# # as matrices
# covariates[[g]] <- h5read(file, sprintf("cov_samples/%s", g) )
# }
# }
# } else covariates <- NULL
# object@covariates <- covariates
# if (any(grepl("cov_samples_transformed", h5ls.out$group))){
# covariates_warped <- list()
# for (g in group_names) {
# if (on_disk) {
# # as DelayedArrays
# covariates_warped[[g]] <- DelayedArray::DelayedArray( HDF5ArraySeed(file, name = sprintf("cov_samples_transformed/%s", g) ) )
# } else {
# # as matrices
# covariates_warped[[g]] <- h5read(file, sprintf("cov_samples_transformed/%s", g) )
# }
# }
# } else covariates_warped <- NULL
# object@covariates_warped <- covariates_warped
# #######################
# ## Load interpolated factor values ##
# #######################
#
# interpolated_Z <- list()
# if (isTRUE(load_interpol_Z)) {
#
# if (isTRUE(verbose)) message("Loading interpolated factor values...")
#
# for (g in group_names) {
# interpolated_Z[[g]] <- list()
# if (on_disk) {
# # as DelayedArrays
# # interpolated_Z[[g]] <- DelayedArray::DelayedArray( HDF5ArraySeed(file, name = sprintf("Z_predictions/%s", g) ) )
# } else {
# # as matrices
# tryCatch( {
# interpolated_Z[[g]][["mean"]] <- h5read(file, sprintf("Z_predictions/%s/mean", g) )
# }, error = function(x) { print("Predicitions of Z not found, not loading it...") })
# tryCatch( {
# interpolated_Z[[g]][["variance"]] <- h5read(file, sprintf("Z_predictions/%s/variance", g) )
# }, error = function(x) { print("Variance of predictions of Z not found, not loading it...") })
# tryCatch( {
# interpolated_Z[[g]][["new_values"]] <- h5read(file, "Z_predictions/new_values")
# }, error = function(x) { print("New values of Z not found, not loading it...") })
# }
# }
# }
# object@interpolated_Z <- interpolated_Z
#######################
## Load expectations ##
#######################
expectations <- list()
node_names <- h5ls.out[h5ls.out$group=="/expectations","name"]
if (verbose) message(paste0("Loading expectations for ", length(node_names), " nodes..."))
if ("AlphaW" %in% node_names)
expectations[["AlphaW"]] <- h5read(file, "expectations/AlphaW")[view_names]
if ("AlphaZ" %in% node_names)
expectations[["AlphaZ"]] <- h5read(file, "expectations/AlphaZ")[group_names]
if ("Sigma" %in% node_names)
expectations[["Sigma"]] <- h5read(file, "expectations/Sigma")
if ("Z" %in% node_names)
expectations[["Z"]] <- h5read(file, "expectations/Z")[group_names]
if ("W" %in% node_names)
expectations[["W"]] <- h5read(file, "expectations/W")[view_names]
if ("ThetaW" %in% node_names)
expectations[["ThetaW"]] <- h5read(file, "expectations/ThetaW")[view_names]
if ("ThetaZ" %in% node_names)
expectations[["ThetaZ"]] <- h5read(file, "expectations/ThetaZ")[group_names]
# if ("Tau" %in% node_names)
# expectations[["Tau"]] <- h5read(file, "expectations/Tau")
object@expectations <- expectations
########################
## Load model options ##
########################
if (verbose) message("Loading model options...")
tryCatch( {
object@model_options <- as.list(h5read(file, 'model_options', read.attributes = TRUE))
}, error = function(x) { print("Model options not found, not loading it...") })
# Convert True/False strings to logical values
for (i in names(object@model_options)) {
if (object@model_options[i] == "False" || object@model_options[i] == "True") {
object@model_options[i] <- as.logical(object@model_options[i])
} else {
object@model_options[i] <- object@model_options[i]
}
}
##########################################
## Load training options and statistics ##
##########################################
if (verbose) message("Loading training options and statistics...")
# Load training options
if (length(object@training_options) == 0) {
tryCatch( {
object@training_options <- as.list(h5read(file, 'training_opts', read.attributes = TRUE))
}, error = function(x) { print("Training opts not found, not loading it...") })
}
# Load training statistics
tryCatch( {
object@training_stats <- h5read(file, 'training_stats', read.attributes = TRUE)
object@training_stats <- h5read(file, 'training_stats', read.attributes = TRUE)
}, error = function(x) { print("Training stats not found, not loading it...") })
#############################
## Load covariates options ##
#############################
#
# if (any(grepl("cov_samples", h5ls.out$group))) {
# if (isTRUE(verbose)) message("Loading covariates options...")
# tryCatch( {
# object@mefisto_options <- as.list(h5read(file, 'smooth_opts', read.attributes = TRUE))
# }, error = function(x) { print("Covariates options not found, not loading it...") })
#
# # Convert True/False strings to logical values
# for (i in names(object@mefisto_options)) {
# if (object@mefisto_options[i] == "False" | object@mefisto_options[i] == "True") {
# object@mefisto_options[i] <- as.logical(object@mefisto_options[i])
# } else {
# object@mefisto_options[i] <- object@mefisto_options[i]
# }
# }
#
# }
#
#######################################
## Load variance explained estimates ##
#######################################
if ("variance_explained" %in% h5ls.out$name) {
r2_list <- list(
r2_total = h5read(file, "variance_explained/r2_total")[group_names],
r2_per_factor = h5read(file, "variance_explained/r2_per_factor")[group_names]
)
object@cache[["variance_explained"]] <- r2_list
}
# Hack to fix the problems where variance explained values range from 0 to 1 (%)
if (max(sapply(object@cache$variance_explained$r2_total,max,na.rm=TRUE),na.rm=TRUE)<1) {
for (m in 1:length(view_names)) {
for (g in 1:length(group_names)) {
object@cache$variance_explained$r2_total[[g]][[m]] <- 100 * object@cache$variance_explained$r2_total[[g]][[m]]
object@cache$variance_explained$r2_per_factor[[g]][,m] <- 100 * object@cache$variance_explained$r2_per_factor[[g]][,m]
}
}
}
##############################
## Specify dimensionalities ##
##############################
# Specify dimensionality of the data
object@dimensions[["M"]] <- length(data) # number of views
object@dimensions[["G"]] <- length(data[[1]]) # number of groups
object@dimensions[["N"]] <- sapply(data[[1]], ncol) # number of samples (per group)
object@dimensions[["D"]] <- sapply(data, function(e) nrow(e[[1]])) # number of features (per view)
# object@dimensions[["C"]] <- nrow(covariates[[1]]) # number of covariates
object@dimensions[["K"]] <- ncol(object@expectations$Z[[1]]) # number of factors
# Assign sample and feature names (slow for large matrices)
if (verbose) message("Assigning names to the different dimensions...")
# Create default features names if they are null
if (is.null(feature_names)) {
print("Features names not found, generating default: feature1_view1, ..., featureD_viewM")
feature_names <- lapply(seq_len(object@dimensions[["M"]]),
function(m) sprintf("feature%d_view_&d", as.character(seq_len(object@dimensions[["D"]][m])), m))
} else {
# Check duplicated features names
all_names <- unname(unlist(feature_names))
duplicated_names <- unique(all_names[duplicated(all_names)])
if (length(duplicated_names)>0)
warning("There are duplicated features names across different views. We will add the suffix *_view* only for those features
Example: if you have both TP53 in mRNA and mutation data it will be renamed to TP53_mRNA, TP53_mutation")
for (m in names(feature_names)) {
tmp <- which(feature_names[[m]] %in% duplicated_names)
if (length(tmp)>0) feature_names[[m]][tmp] <- paste(feature_names[[m]][tmp], m, sep="_")
}
}
features_names(object) <- feature_names
# Create default samples names if they are null
if (is.null(sample_names)) {
print("Samples names not found, generating default: sample1, ..., sampleN")
sample_names <- lapply(object@dimensions[["N"]], function(n) paste0("sample", as.character(seq_len(n))))
}
samples_names(object) <- sample_names
# Add covariates names
# if(!is.null(object@covariates)){
# # Create default covariates names if they are null
# if (is.null(covariate_names)) {
# print("Covariate names not found, generating default: covariate1, ..., covariateC")
# covariate_names <- paste0("sample", as.character(seq_len(object@dimensions[["C"]])))
# }
# covariates_names(object) <- covariate_names
# }
# Set views names
if (is.null(names(object@data))) {
print("Views names not found, generating default: view1, ..., viewM")
view_names <- paste0("view", as.character(seq_len(object@dimensions[["M"]])))
}
views_names(object) <- view_names
# Set groups names
if (is.null(names(object@data[[1]]))) {
print("Groups names not found, generating default: group1, ..., groupG")
group_names <- paste0("group", as.character(seq_len(object@dimensions[["G"]])))
}
groups_names(object) <- group_names
# Set factors names
factors_names(object) <- paste0("Factor", as.character(seq_len(object@dimensions[["K"]])))
###################
## Parse factors ##
###################
# Calculate variance explained estimates per factor
if (is.null(object@cache[["variance_explained"]])) {
object@cache[["variance_explained"]] <- calculate_variance_explained(object)
}
# Remove inactive factors
if (remove_inactive_factors) {
r2 <- rowSums(do.call('cbind', lapply(object@cache[["variance_explained"]]$r2_per_factor, rowSums, na.rm=TRUE)))
var.threshold <- 0.0001
if (all(r2 < var.threshold)) {
warning(sprintf("All %s factors were found to explain little or no variance so remove_inactive_factors option has been disabled.", length(r2)))
} else if (any(r2 < var.threshold)) {
object <- subset_factors(object, which(r2>=var.threshold))
message(sprintf("%s factors were found to explain no variance and they were removed for downstream analysis. You can disable this option by setting load_model(..., remove_inactive_factors = FALSE)", sum(r2 < var.threshold)))
}
}
# [Done in mofapy2] Sort factors by total variance explained
if (sort_factors && object@dimensions$K>1) {
# Sanity checks
if (verbose) message("Re-ordering factors by their variance explained...")
# Calculate variance explained per factor across all views
r2 <- rowSums(sapply(object@cache[["variance_explained"]]$r2_per_factor, function(e) rowSums(e, na.rm = TRUE)))
order_factors <- c(names(r2)[order(r2, decreasing = TRUE)])
# re-order factors
object <- subset_factors(object, order_factors)
}
# Mask outliers
if (remove_outliers) {
if (verbose) message("Removing outliers...")
object <- .detect_outliers(object)
}
# Mask intercepts for non-Gaussian data
if (any(object@model_options$likelihoods!="gaussian")) {
for (m in names(which(object@model_options$likelihoods!="gaussian"))) {
for (g in names(object@intercepts[[m]])) {
object@intercepts[[m]][[g]] <- NA
}
}
}
# ######################
# ## Quality controls ##
# ######################
#
# if (verbose) message("Doing quality control...")
# object <- .quality_control(object, verbose = verbose)
#
return(object)
}
mofa_trained <- load_model(outfile)
Recalculating total variance explained values (r2_total)...
9 factors were found to explain no variance and they were removed for downstream analysis. You can disable this option by setting load_model(..., remove_inactive_factors = FALSE)
samples_names(mofa_trained) <- samples_names(object)
samples_metadata(mofa_trained)
rownames(samples_metadata(mofa_trained)) <- samples_metadata(mofa_trained)[["sample"]]
plot_factor_cor(mofa_trained, method = "spearman")
Do factors relate to the number of cells in pseudobulk?
Distinguish technical factors by weight sparsity
exclude_factors
[1] Factor1 Factor2 Factor11 Factor13 Factor18 Factor20
21 Levels: Factor1 Factor2 Factor3 Factor4 Factor5 Factor6 Factor7 Factor8 Factor9 Factor10 ... Factor21
Calculating Adjusted Mutual Information between organ identity and clustering of pseudobulks based on factor values
plot_factor_ordered <- function(mofa_trained, f){
factor_df <- get_factors(mofa_trained, factors = f, as.data.frame = TRUE) %>%
mutate(organ = sapply(str_split(sample, "_"), function(x) x[2])) %>%
group_by(group) %>%
mutate(gr_mean = median(value)) %>%
ungroup() %>%
arrange(gr_mean) %>%
mutate(group=factor(group, levels=unique(group)))
r2_df <- get_variance_explained(mofa_trained, factors = f, as.data.frame = TRUE)[[1]] %>%
filter(factor==paste0('Factor',f)) %>%
mutate(group=factor(group, levels = levels(factor_df$group)))
pl1 <- factor_df %>%
ggplot(aes(group, value)) +
geom_boxplot() +
geom_jitter(aes(color= organ), size=0.7) +
geom_hline(yintercept = 0, linetype=2) +
coord_flip() +
ylab(paste0("Factor ", f)) +
theme_bw(base_size = 14)
pl2 <- r2_df %>%
ggplot(aes(group, value)) +
geom_col() +
coord_flip() +
ylab("% variance explained") +
theme_bw(base_size = 14) +
remove_y_axis()
pl1 + pl2 + plot_layout(widths=c(2,1), guides="collect")
}
get_top_celltype_per_factor <- function(mofa_trained, f){
r2_df <- get_variance_explained(mofa_trained, factors = f, as.data.frame = TRUE)[[1]] %>%
filter(factor==paste0('Factor',f))
# mutate(group=factor(group, levels = ))
top_quant_r2 <- quantile(r2_df$value, probs = seq(0, 1, by = 0.2))["80%"]
top_groups <- r2_df$group[r2_df$value >= top_quant_r2]
return(top_groups)
}
save_factor_id <- function(mofa_trained, f, figdir){
## Order celltypes by factor values
p1 <- plot_factor_ordered(mofa_trained, f)
## Plot factor values across organs for celltypes with high variance explained
p2 <- plot_factor(mofa_trained, factors = f, groups = get_top_celltype_per_factor(mofa_trained, f), group_by = "group",
color_by = "organ",
dot_size = 2, dodge = TRUE
)
## Plot factor weights on genes
# plot_data_heatmap(mofa_trained, factor = f, nfeatures = 50, text_size = 3, show_colnames=FALSE,
# annotation_samples = c("organ", "time", "method", "donor"))
p3 <- plot_weights(mofa_trained, factors = f, nfeatures = 30, text_size = 3) +
scale_y_discrete(expand=c(0.1, 0.1))
full_pl <- (p1 | (p2 / p3)) +
plot_layout(guides="collect")
ggsave(glue("{figdir}/MOFA_{split}_factorID_factor{f}.pdf"), plot=full_pl, width = 15, height = 10)
}
for (f in 1:mofa_trained@dimensions$K){
print(paste0("Saving ID for Factor ", f, "..."))
save_factor_id(mofa_trained, f=f, figdir = figdir)
}
[1] "Saving ID for Factor 1..."
Scale for 'y' is already present. Adding another scale for 'y', which will replace the existing scale.
[1] "Saving ID for Factor 2..."
[1] "Saving ID for Factor 3..."
[1] "Saving ID for Factor 4..."
[1] "Saving ID for Factor 5..."
[1] "Saving ID for Factor 6..."
[1] "Saving ID for Factor 7..."
[1] "Saving ID for Factor 8..."
[1] "Saving ID for Factor 9..."
[1] "Saving ID for Factor 10..."
[1] "Saving ID for Factor 11..."
[1] "Saving ID for Factor 12..."
[1] "Saving ID for Factor 13..."
[1] "Saving ID for Factor 14..."
[1] "Saving ID for Factor 15..."
[1] "Saving ID for Factor 16..."
[1] "Saving ID for Factor 17..."
[1] "Saving ID for Factor 18..."
[1] "Saving ID for Factor 19..."
[1] "Saving ID for Factor 20..."
[1] "Saving ID for Factor 21..."
# save_factor_id(mofa_trained, f=1, figdir = figdir)
# plot_weights(mofa_trained, factors = f, nfeatures = 30, text_size = 3) +
# scale_y_discrete(expand=c(0.1, 0.1))
get_top_weight_genes <- function(mofa_trained, f, n_top=20, which="top"){
w_df <- get_weights(mofa_trained, factors = f, as.data.frame = TRUE) %>%
dplyr::arrange(value)
top_genes <- w_df %>%
dplyr::top_n(n_top, value) %>%
dplyr::pull(feature) %>%
as.character()
bot_genes <- w_df %>%
dplyr::top_n(n_top, -value) %>%
dplyr::pull(feature) %>%
as.character()
if (which=="top") {
genes <- top_genes
} else if (which=="bottom"){
genes <- bot_genes
} else if (which=="both"){
genes <- c(top_genes, bot_genes)
}
return(genes)
}
plot_data_top_weights <- function(mofa_trained, ct, f, n_top=20, which="top"){
genes <- get_top_weight_genes(mofa_trained, f, which=which, n_top=n_top)
data <- get_data(mofa_trained, groups=ct)[[1]][[1]][genes,]
pl_df <- reshape2::melt(data, varnames=c("gene", "sample")) %>%
dplyr::left_join(samples_metadata(mofa_trained)) %>%
dplyr::arrange(age) %>%
dplyr::mutate(sample=factor(sample, levels=unique(sample))) %>%
dplyr::group_by(gene) %>%
dplyr::mutate(value=scale(value))
pl_df %>%
ggplot(aes(sample, gene, fill=value)) +
geom_tile() +
facet_grid(.~organ, space="free", scales="free") +
scale_fill_gradient2(high="red", low="blue", name="Scaled\nexpression") +
xlab("----age--->") + ylab(glue("{which} weight genes")) +
theme_bw(base_size=16) +
theme(axis.ticks.x = element_blank(), axis.text.x = element_blank()) +
ggtitle(glue('{ct} - {f}'))
}
for (g in all_groups){
fs <- get_top_factor_per_celltype(mofa_trained, g, min_R2=2)
fs <- fs[!fs %in% exclude_factors]
if (length(fs) > 0){
top_plots <- lapply(fs, function(x) (plot_data_top_weights(mofa_trained, g, x, which="top") + remove_x_axis()) /
plot_data_top_weights(mofa_trained, g, x, which="bottom") + ggtitle("")
)
full_pl <-wrap_plots(top_plots, ncol=1)
ggsave(glue("{figdir}/top_factors_expr_{g}.pdf"),plot=full_pl, width=12, height = 10*length(top_plots))
}
}
get_variance_explained(mofa_trained, factors = 5, as.data.frame = TRUE)[[1]] %>%
dplyr::filter(factor=="Factor5") %>%
dplyr::arrange(value) %>%
dplyr::mutate(group=factor(group, levels=unique(group))) %>%
# dplyr::filter(value > 1) %>%
ggplot(aes(group, value)) +
geom_col() +
coord_flip() +
geom_hline(yintercept = 2, color="red", linetype=2) +
ylab("% Var. explained")
get_variance_explained(mofa_trained, factors = 5, as.data.frame = TRUE)[[1]] %>%
dplyr::filter(group=="CD16+_MACROPHAGE") %>%
dplyr::filter(!factor %in% exclude_factors) %>%
dplyr::arrange(value) %>%
dplyr::mutate(factor=factor(factor, levels=unique(factor))) %>%
# dplyr::filter(value > 1) %>%
ggplot(aes(factor, value)) +
geom_col() +
coord_flip() +
geom_hline(yintercept = 2, color="red", linetype=2) +
ylab("% Var. explained")
#
# gs <- dplyr::filter(gr_top_factors, top_factors=="Factor4") %>% dplyr::pull(group)
# groups = gs[!gs %in% c("EO_BASO_MAST", "PRE_PRO_B", "DC2")]
#
# plot_data_heatmap(mofa_trained, factor = 4, groups = c("NEUTROPHIL"), show_colnames=FALSE, annotation_samples = 'organ', annotation_colors=list(organ=org_colors), features = 50)
# BiocManager::install("MOFAdata")
library(MOFAdata)
utils::data(reactomeGS)
head(rownames(reactomeGS))
## Remove row with NA
reactomeGS <- reactomeGS[!is.na(rownames(reactomeGS)),]
BiocManager::install('ensembldb')
library(EnsDb.Hsapiens.v86)
hg.pairs <- readRDS(system.file("exdata", "human_cycle_markers.rds", package="scran"))
all_genes <- ensembldb::genes(EnsDb.Hsapiens.v86)
detach(package:EnsDb.Hsapiens.v86)
detach(package:ensembldb)
# gene_name_2_id <- function(gene){
# return(all_genes[all_genes$gene_name==gene,]$gene_id[1])
# }
#
# gene_ids <- sapply(mofa_trained@features_metadata$feature, gene_name_2_id)
# rowData(sce)["gene_id"] <- gene_ids
# rowData(sce)["gene_name"] <- rownames(sce)
gene_names_reactome <- all_genes[colnames(reactomeGS)]$gene_name
colnames(reactomeGS) <- gene_names_reactome
Subset to genes tested
reactomeGS_universe <- reactomeGS[, colnames(reactomeGS) %in% mofa_trained@features_metadata$feature]
# GSEA on positive weights, with default options
res.positive <- run_enrichment(mofa_trained,
view='corrected_logcounts',
# statistical.test = 'cor.adj.parametric',
feature.sets = reactomeGS_universe,
sign = "positive",
)
# GSEA on negative weights, with default options
res.negative <- run_enrichment(mofa_trained,
view='scaled_logcounts',
# statistical.test = 'cor.adj.parametric',
feature.sets = reactomeGS_universe,
sign = "negative"
)
for (f in 1:mofa_trained@dimensions$K){
if (min(res.positive$pval.adj[,paste0("Factor", f)]) < 0.1) {
print(plot_enrichment(res.positive, factor = f, alpha=0.1) + ggtitle("Positive weights") +
plot_enrichment(res.negative, factor = f, alpha=0.1) + ggtitle("Negative weights") +
plot_annotation(title=paste0("Factor", f)))
}
}
signif_pathways <- rownames(data.frame(res.negative$pval.adj))[order(data.frame(res.negative$pval.adj)[["Factor8"]])[0:10]]
colnames(reactomeGS_universe)[reactomeGS_universe[signif_pathways[5],]==1]
plot_enrichment_detailed(res.negative, factor = 8)
| #### KNN graph per celltype |
| ```r ## Get factors that explain most variance in each celltype get_top_factor_per_celltype <- function(mofa_trained, gr, min_R2=2){ get_variance_explained(mofa_trained, as.data.frame = TRUE)[[1]] %>% dplyr::filter(group==gr) %>% dplyr::filter(value >= min_R2) %>% dplyr::pull(factor) %>% as.character() } |
| ## Make KNN graph based on similarity of top factors for each celltype get_ct_KNN_graph <- function(mofa_trained, gr, min_R2=5, k=5){ ## Get factors that explain most variance per celltype fs <- get_top_factor_per_celltype(mofa_trained, gr, min_R2 = min_R2) |
| ## Exclude technical factors fs <- fs[!fs %in% exclude_factors] |
| ## Make KNN graph from top factors Z <- get_factors(mofa_trained, groups=gr, factors = fs)[[1]] knn_ct <- buildKNNGraph(t(Z), k=k) |
| ## Add attributes metadata_ct <- samples_metadata(mofa_trained)[rownames(Z),] # covariates V(knn_ct)\(organ <- metadata_ct\)organ V(knn_ct)\(age <- metadata_ct\)age V(knn_ct)\(n_cells <- metadata_ct\)n_cells V(knn_ct)\(method <- metadata_ct\)method V(knn_ct)\(donor <- metadata_ct\)donor # top factors for (c in colnames(Z)){ vertex_attr(knn_ct)[[c]] <- Z[,c] } |
| return(knn_ct) } |
| ## Plot KNN graph plot_ct_KNN_graph <- function(knn, color_by=“organ”){ ## Define color if (!color_by %in% names(vertex_attr(knn))){ stop(“specified color_by variable is not in vertex_attr(knn)”) } |
| if (color_by==“organ”){ scale_color_knngraph <- scale_color_manual(values=org_colors) } else if (is.numeric(vertex_attr(knn, color_by))){ scale_color_knngraph <- scale_color_viridis_c(option=“magma”) } else { scale_color_knngraph <- scale_color_discrete() } |
| vertex_attr(knn, “color_by”) <- vertex_attr(knn, color_by) |
| ggraph(knn) + geom_edge_link0() + geom_node_point(aes(color=color_by, size=n_cells)) + theme(panel.background = element_blank()) + scale_color_knngraph + scale_size(range=c(2,7)) } |
| knn_graph_pl <- lapply(all_groups, function(g){ knn <- get_ct_KNN_graph(mofa_trained, g, k=5, min_R2 = 2) plot_ct_KNN_graph(knn, color_by = ‘organ’) + ggtitle(g) }) |
| knn_graph_pl ``` |
| ```r ## Score connectivity between samples from the same organ .calc_connectivity_score <- function(knn, o){ adj <- get.adjacency(knn) n_org <- sum(V(knn)\(organ==o) n_other <- sum(V(knn)\)organ!=o) within_edges <- sum(adj[V(knn)\(organ==o,V(knn)\)organ==o]) between_edges <- sum(adj[V(knn)\(organ==o,V(knn)\)organ!=o]) score <- (within_edges/between_edges)*(n_other/n_org) return(score) } |
| ## Calculate connectivity score for permutations of node labels conn_score_test <- function(knn, o, n_perm=1000){ real_score <- .calc_connectivity_score(knn, o) ## Random permutations rand_scores <- c() for (i in 1:n_perm){ rand_knn <- knn V(rand_knn)\(organ <- sample(V(knn)\)organ) rand_scores <- c(rand_scores, .calc_connectivity_score(rand_knn, o)) } |
| p_val <- sum(c(rand_scores, real_score) >= real_score)/(n_perm + 1) if (p_val < 2e-16){ p_val <- 2e-16} return(c(‘score’=real_score,‘p_value’=p_val)) } |
| ## Calculate connectivity score + significance with permutation test test_conn_group <- function(mofa_trained, g, k=5, min_R2 = 2, n_perm=1000){ knn <- get_ct_KNN_graph(mofa_trained, g, k=k, min_R2 = min_R2) test_orgs <- names(table(V(knn)\(organ))[table(V(knn)\)organ) > 2] return(sapply(test_orgs, function(o) conn_score_test(knn, o, n_perm=n_perm))) } |
| ## Test in widespread cell types connectivity_test_ls <- lapply(anno_order[1:10], function(g) test_conn_group(mofa_trained, g)) connectivity_test_ls <- setNames(connectivity_test_ls, anno_order[1:10]) |
| connectivity_test_df <- imap(connectivity_test_ls, ~ data.frame(t(.x)) %>% tibble::rownames_to_column(“organ”) %>% dplyr::mutate(group=.y)) %>% purrr::reduce(dplyr::bind_rows) %>% dplyr::mutate(is_signif = ifelse(p_value < 0.01, TRUE, FALSE)) |
| connectivity_test_df %>% ggplot(aes(organ, group,fill=log10(score))) + geom_tile() + scale_fill_distiller(palette=“Reds”, direction = 1) + geom_text(data=. %>% dplyr::filter(is_signif), label="*", size=5) |
| ``` |
r connectivity_test_df %>% dplyr::group_by(group) %>% dplyr::mutate(mean_val=median(score)) %>% dplyr::ungroup() %>% dplyr::arrange(-mean_val) %>% dplyr::mutate(group=factor(group, levels=unique(group))) %>% ggplot(aes(organ, log1p(score))) + geom_col(fill="grey") + geom_col(data=. %>% dplyr::filter(is_signif), aes(fill=organ)) + scale_fill_manual(values=org_colors) + coord_flip() + facet_grid(group~.) + theme(strip.text.y = element_text(angle=0)) |
–> –> –> –> –> –> –> –> –> –> –> –> –> –> –> –> –>
–> –> –> –> –>
–> –> –>